********************************************************************************
********************************************************************************
* This do file replicates all Online Appendix figures and table of the field 
* experiment: Table A2, Table A4, Table G1, Figure G1, Figure H1, Table G1, 
* Figure G1, and Figure H1..
********************************************************************************
********************************************************************************

*******************************************************************************
* Table A2. The effect of language proficiency on the callback rate,
* employer survey sample.
*******************************************************************************

use Data/data_field_experiment_JHR, clear

global resume_X ccity1 ccity2 ccountry1 ccountry2 ccountry3 occ_mekaniker-occ_kundtjänstpersonal occ_personligassistent-occ_supporttekniker aappl_type2 aappl_type3 aappl_order2 aappl_order3

reg callback language_level $resume_X, robust
reg callback language_level $resume_X if Lang_important != . , robust

*******************************************************************************
* Table A4. The effect of language proficiency on the callback rate,
* by stating a literacy skill requirement in the job advertisement (or not).
*******************************************************************************
use Data/data_field_experiment_JHR, clear

global resume_X ccity1 ccity2 ccountry1 ccountry2 ccountry3 occ_mekaniker-occ_kundtjänstpersonal occ_personligassistent-occ_supporttekniker aappl_type2 aappl_type3 aappl_order2 aappl_order3

gen Lang_req = language_req_mentioned == 1
gen Not_Lang_req = language_req_mentioned == 0
gen Lang_req_miss = language_req_mentioned == .

gen llevel_Lang_req = language_level * Lang_req
gen llevel_Not_Lang_req = language_level * Not_Lang_req
gen llevel_Lang_req_miss = language_level * Lang_req_miss

reg callback llevel_Lang_req llevel_Not_Lang_req llevel_Lang_req_miss Lang_req_miss Lang_req $resume_X, robust /* ! 3112 vs 3153 ?? */
test llevel_Not_Lang_req = llevel_Lang_req
local pvalue = round(r(p), .0001)

******************************************************************************
* Table G1. Comparisons of means of resume characteristics across treatments.
******************************************************************************

use Data/data_field_experiment_JHR, clear
gen AllAppl = 1

putdocx begin

* Initialize a table with headers
putdocx table balance_table = (60, 7)  
putdocx table balance_table(1, 1) = ("Variable")
putdocx table balance_table(1, 2) = ("Num obs")
putdocx table balance_table(1, 3) = ("Callback Mean")  
putdocx table balance_table(1, 4) = ("L3L4_0")
putdocx table balance_table(1, 5) = ("L3L4_1")
putdocx table balance_table(1, 6) = ("Diff")
putdocx table balance_table(1, 7) = ("p-value")

local row = 1
foreach X of varlist ccity* ccountry* high_skill medium_skill low_skil occ_mekaniker-occ_personligassistent occ_butikssäljare-occ_supporttekniker aappl_order* aappl_type* {
    
    local row = `row' + 1
    
    * Calculate the mean of callback for the variable X
    summarize callback if `X' == 1, meanonly
    local callback_mean = round(r(mean), 0.01)
    
    summarize `X' if L3L4 == 0, meanonly
    local mean_L34_0 = round(r(mean), 0.01)

    summarize `X' if L3L4 == 1, meanonly
    local mean_L34_1 = round(r(mean), 0.01)

    * Calculate the difference in means
    local diff = `mean_L34_0' - `mean_L34_1'
    
    tabulate L3L4 `X', exact
    local p_value = round(r(p_exact), 0.001)
    local stars ""
    if `p_value' < 0.01 {
        local stars "*"
    }
    else if `p_value' < 0.05 {
        local stars "**"
    }
    else if `p_value' < 0.10 {
        local stars "***"
    }
    else {
        local stars ""
    } 
    count if `X' == 1
    local num_obs = r(N)
    
    di "Adding row number: `row'"
    putdocx table balance_table(`row', 1) = ("`X'")
    putdocx table balance_table(`row', 2) = ("`num_obs'")
    putdocx table balance_table(`row', 3) = ("`callback_mean'")  
    putdocx table balance_table(`row', 4) = ("`mean_L34_0'")
    putdocx table balance_table(`row', 5) = ("`mean_L34_1'")
    putdocx table balance_table(`row', 6) = ("`diff'")
    putdocx table balance_table(`row', 7) = ("`p_value'")
}
putdocx save "Stata output/balance_table.docx", replace

******************************************************************************
* Figure G1. Distribution of the number of significant differences in means.
******************************************************************************

clear all
set matsize 10000

local num_applications 3000 /* Same size as in experiment */
local num_iterations 1000   /* Number of iterations */

tempfile results
postfile handle significant_count using `results', replace

forval i = 1/`num_iterations' {
    * Generate new data
    clear
    qui do 2_1_data_applications_simulation_balance `num_applications'
    qui do 2_2_data_applications_simulation_balance_create_X_vars

    * Counter for significant tests
    local significant_count = 0

    foreach X of varlist ccity* ccountry* high_skill medium_skill low_skill occ_mekaniker-occ_personligassistent occ_butikssäljare-occ_supporttekniker aappl_order* aappl_type* {
        * Perform the exact test using tabulate
        qui tabulate L3L4 `X', exact
        local p_value = round(r(p_exact), 0.001)
        
        * Check if the p-value is significant at the 10% level
        if `p_value' < 0.10 {
            local significant_count = `significant_count' + 1
        }
    }
    * Store the significant count for this iteration
    post handle (`significant_count')

    * Display a message every 10th iteration
    if mod(`i', 10) == 0 {
        display "Completed iteration `i' of `num_iterations'"
    }
}

postclose handle

use `results', clear

gen gr_8_significant = significant_count >= 8
tab gr_8_significant

egen freq = count(significant_count), by(significant_count)
summarize
gen fraction = freq / r(N)

bysort significant_count: keep if _n == 1

graph bar (mean) fraction, over(significant_count) ///
    ytitle("Probability") ///
	bar(1, color(white) lcolor(black)) graphregion(color(white))
	
******************************************************************************
* Figure H1. Callback rate over time in field experiment.
******************************************************************************

use Data/data_field_experiment_JHR, clear

gen date_start_experiment = date("2/18/2020", "MDY")
format date_start_experiment %td
local period_days = 45
gen date_collected_2 = date(date_collected, "MDY")
format date_collected_2 %td
gen period_`period_days' = floor((date_collected_2 - date_start_experiment) / `period_days') + 1
collapse (mean)callback_`period_days' = callback (count)num_applications_`period_days' = callback (mean)date_collected_2_`period_days' = date_collected_2, by(period_`period_days')

* Appendix Figure A10. Callback rate over time in field experiment.
twoway connected  callback_`period_days' date_collected_2_`period_days', ///
ytitle("Callback rate") ylabel(,nogrid) legend(off) graphregion(color(gs16) ) ///
title("") xtitle("")  ylabel(0.0(0.05)0.35) yscale(range(0.0(0.05)0.35)) ///
lcolor(black) mcolor(black) ///
tlabel(, format(%tdmy)) 

